# delimit ;
clear ;
set more off ;
version 12.0 ;

cd "replication" ;

* ***************************************************************************** ;
* Carpena, Fenella, and Bilal Zia. "The causal mechanism of financial education: 
* Evidence from mediation analysis." Journal of Economic Behavior & Organization 
* 177 (2020): 143-184.
* 
* This do-file creates Tables 1-8 of the paper
* ***************************************************************************** ;

program drop _all ;
est drop _all ;
adopath + "`c(pwd)'/code/" ;

* ***************************************************************************** ;
* get endline data
* ***************************************************************************** ;

use "./data/endline.dta", clear ;

* merge with baseline ;
merge 1:1 id using "./data/baseline.dta" ;
tab _merge ;

* correction to the variable label ;
label var savings_bank "has bank savings account" ;

* ***************************************************************************** ;
* sample selection
* ***************************************************************************** ;

* drop households with baseline but no endline ;
drop if _merge == 2 ; 
drop _merge ;

* drop households with no financial knowledge scores ; 
egen temp_knowl = rowmean(numer_fin_return-attit_budget) ;
drop if temp_knowl == . ;
drop temp_knowl ; 

* identify respondents with numeracy measures ;
merge 1:1 id using "./data/financial-knowledge-short-term", keepusing(id) ;
tab _merge ;
keep if _merge == 3 ;
drop _merge ;

* drop households with missing discount rate (control variable) ;
drop if disc_rate == . ;

* ***************************************************************************** ;
* create financial knowledge scores
* ***************************************************************************** ;

egen end_numeracy = rowmean(numer_fin_return-numer_int_rate) ;
egen end_awareness = rowmean(aware_budget-aware_unprod) ;
egen end_attitudes = rowmean(attit_suggest_ins-attit_budget) ;

* ***************************************************************************** ;
* create treatment dummies
* ***************************************************************************** ;

gen flcounsgoal = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 1)  ;
gen flcouns = (fin_ed_treatment == 1 & couns_treatment == 1 & goal_treatment == 0) ;
gen flgoal = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 1) ;
gen fl = (fin_ed_treatment == 1 & couns_treatment == 0 & goal_treatment == 0) ;
gen control = (fin_ed_treatment == 0 & couns_treatment == 0 & goal_treatment == 0) ;

* **************************************************************************** ;
* TABLE 1: experimental design
* ***************************************************************************** ;

shell printf "********** Table 1 **********\n" > "./output/table-01-08.csv" ;

shell printf "Group, N\n" >> "./output/table-01-08.csv" ;

count if control ;
shell printf "Control, `r(N)'\n" >> "./output/table-01-08.csv" ;

count if fl ;
shell printf "FL Only, `r(N)'\n" >> "./output/table-01-08.csv" ;

count if flgoal ;
shell printf "FL Goal, `r(N)'\n" >> "./output/table-01-08.csv" ;

count if flcouns ;
shell printf "FL Couns, `r(N)'\n" >> "./output/table-01-08.csv" ;

count if flcounsgoal ;
shell printf "FL Couns Goal, `r(N)'\n\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* TABLE 2: baseline summary statistics
* ***************************************************************************** ;

* re-order the variables ;
order 
hhsize
hhincome
hhincome_pc
hh_has_phone
hh_has_water_connexn
hh_has_nonfarm
female
hindu
completed_secondary
mficlient
has_hard_time_saving
interested_in_financial
inconsistent
disc_rate
riskaverse
mathscore
finscore ;

* median, mean, SD ;
tabstat hhsize-finscore, columns(statistics) format(%9.2f) statistics(median mean sd) longstub save ;

* save tabstat matrix ;
matrix summstats = r(StatTotal)' ;

* get f-stats and save pvalues as vector;
local i = 1 ;
foreach var of varlist hhsize-finscore { ;

	areg `var' fl flgoal flcouns flcounsgoal , a(strata) clu(wave_class) ;
	test fl flgoal flcouns flcounsgoal ;

	if `i' == 1 {;
		matrix pvals = round(`r(p)', 0.001) ;
		matrix colnames pvals = pvalue ;
	} ;

	else { ;
		matrix r = round(`r(p)', 0.001) ;
		matrix pvals = pvals \ r ;
	} ;

	local ++ i ;
} ;

* add columns of pvals to the right of summstats (column join);
matrix summstats = summstats, pvals ;
matrix list summstats ;

correlate hhsize-finscore ;
matrix corrmatrix = r(C) ;

* output to csv file ; 
shell printf "********** Table 2 **********\n" >> "./output/table-01-08.csv" ;
shell printf "var_name,median,mean,sd,cv,correlation,pval \n" >> "./output/table-01-08.csv" ;
local rownames : rownames summstats ;
local max = rowsof(summstats) ;

forvalues i = 1/`max' { ;
	macro drop var_name median mean sd cv correlation pval ;
	local var_name: word `i' of `rownames' ;
		
	* median ;
	if "`var_name'" == "hhsize" | "`var_name'" == "hhincome" | "`var_name'" == "hhincome_pc" 
	 | "`var_name'" == "disc_rate" | "`var_name'" == "mathscore" | "`var_name'" == "finscore" { ;
		local median = round(summstats[`i', 1], 0.01) ;
	} ;
	
	else local median "" ;
	
	* mean ;
	local mean = round(summstats[`i', 2], 0.01) ;
	
	* sd ;
	local sd = round(summstats[`i', 3], 0.01) ;
	
	* coefficient of variation ;
	if "`var_name'" == "hhsize" | "`var_name'" == "hhincome" | "`var_name'" == "hhincome_pc" 
	 | "`var_name'" == "disc_rate" | "`var_name'" == "mathscore" | "`var_name'" == "finscore" { ;	
	 	local cv = round(`sd'/`mean', 0.01) ;
	} ;
	
	else local cv "" ;
	
	* correlation ;
	scalar rho = corrmatrix[rownumb(corrmatrix,"finscore"),colnumb(corrmatrix,"`var_name'")] ;
	local rho = round(rho, 0.01) ;
	
	* f-stat pvalues ;
	local pval = round(summstats[`i', 4], 0.001) ;
	
	qui shell printf "`var_name',`median',`mean',`sd', `cv', `rho', `pval' \n" >> "./output/table-01-08.csv" ;
} ;

shell printf "\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* pre-treatment control variables
* ***************************************************************************** ;

local cvars has_hard_time_saving interested_in_financial inconsistent disc_rate riskaverse  ;
local rcvars rhas_hard_time_saving rinterested_in_financial rinconsistent rdisc_rate rriskaverse  ;

* ***************************************************************************** ;
* TABLE 3: Average Treatment Effects 
* ***************************************************************************** ;

* regressions, loop over all outcomes ; 
foreach y of varlist budget_tried savings_bank loan_purpose_bus_educ ins_life { ;
	
	* fin ed only ;
	areg `y' fl `cvars' if fl | control, a(strata) clu(wave_class) ; 
	est sto `y'1 ;
	
	* fin ed + goal ;
	areg `y' flgoal `cvars' if flgoal | control, a(strata) clu(wave_class) ; 
	est sto `y'2 ;
	
	* fin ed + couns ;
	areg `y' flcouns `cvars' if flcouns | control, a(strata) clu(wave_class) ; 
	est sto `y'3 ;
	
	* fin ed + couns + goal ;
	areg `y' flcounsgoal `cvars' if flcounsgoal | control, a(strata) clu(wave_class) ;
	est sto `y'4 ;
	
} ;

* output results ;
shell printf "********** Table 3 **********\n" >> "./output/table-01-08.csv" ;

forvalues i = 1/4 { ;	
	esttab *`i' using "./output/table-01-08.csv", 
		append depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `cvars')
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N, fmt(%9.3f %9.0f) 
		labels("Adj. R-squared" "Observations"))	;
		
	shell printf "\r\n" >> "./output/table-01-08.csv" ;
} ;

* ***************************************************************************** ;
* TABLE 4: Effects on Financial Knowledge
* ***************************************************************************** ;

est drop _all ;

* regressions, loop over all outcomes ; 
foreach y of varlist end_numeracy end_awareness end_attitudes { ;
	
	* fin ed only ;
	areg `y' fl `cvars' if fl | control, a(strata) clu(wave_class) ; 
	est sto `y'1 ;
	
	* fin ed + goal ;
	areg `y' flgoal `cvars' if flgoal | control, a(strata) clu(wave_class) ; 
	est sto `y'2 ;
	
	* fin ed + couns ;
	areg `y' flcouns `cvars' if flcouns | control, a(strata) clu(wave_class) ; 
	est sto `y'3 ;
	
	* fin ed + couns + goal ;
	areg `y' flcounsgoal `cvars' if flcounsgoal | control, a(strata) clu(wave_class) ;
	est sto `y'4 ;
	
} ;

* output results ;
shell printf "********** Table 4 **********\n" >> "./output/table-01-08.csv" ;

forvalues i = 1/4 { ;
	esttab *`i' using "./output/table-01-08.csv", 
		append depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `cvars' )
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N, fmt(%9.3f %9.0f) 
		labels("Adj. R-squared" "Observations"))	;
	
	shell printf "\r\n" >> "./output/table-01-08.csv" ;
} ;

* ***************************************************************************** ;
* rename variables to make them shorter ;
* ***************************************************************************** ;

ren budget_helpful bdg_help ;
ren budget_tried bdg_tried ;
ren budget_regular bdg_reg ;
ren savings_informal sav_inf ;
ren savings_bank sav_bnk ;
ren loan_purpose_bus_educ loan_bus ; 
ren ins_life ins_life ;

* ***************************************************************************** ;
* TABLE 5: Causal Mediation: Financial Education Treatment
* ***************************************************************************** ;

est drop _all ;

local t fl ;
local outcomes bdg_tried ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
	} ;
} ;

* output ;
shell printf "********** Table 5 **********\n" >> "./output/table-01-08.csv" ;

esttab rbdg_tried*  using "./output/table-01-08.csv",
		append msign(-) depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `rcvars')
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N
		acme acme_sd acme_ci_lo acme_ci_hi
		ade ade_sd ade_ci_lo ade_ci_hi
		ate ate_sd ate_ci_lo ate_ci_hi
		pct pct_sd pct_ci_lo pct_ci_hi, 
		fmt(%9.3f %20s %20s %9.3f %9.0f
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		) 
		labels("Adj. R-squared" "Observations" 
		"ACME" "ACME SD" "ACME CI Low" "ACME CI High"
		"ADE" "ADE SD" "ADE CI Low" "ADE CI High"
		"ATE" "ATE SD" "ATE CI Low" "ATE CI High"
		"Share of ATE Mediated" "Share SD" "Share CI Low" "Share CI High"
		))	;

shell printf "\r\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* TABLE 6: Causal Mediation: Financial Education and Goal Setting Treatment
* ***************************************************************************** ;

est drop _all ;
drop rbdg_tried-rend_attitudes ;

local t flgoal ;
local outcomes bdg_tried sav_bnk ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
	} ;
} ;

* output ;
shell printf "********** Table 6 **********\n" >> "./output/table-01-08.csv" ;

esttab rbdg_tried* rsav_bnk* using "./output/table-01-08.csv",
		append msign(-) depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `rcvars')
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N
		acme acme_sd acme_ci_lo acme_ci_hi
		ade ade_sd ade_ci_lo ade_ci_hi
		ate ate_sd ate_ci_lo ate_ci_hi
		pct pct_sd pct_ci_lo pct_ci_hi, 
		fmt(%9.3f %20s %20s %9.3f %9.0f
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		) 
		labels("Adj. R-squared" "Observations" 
		"ACME" "ACME SD" "ACME CI Low" "ACME CI High"
		"ADE" "ADE SD" "ADE CI Low" "ADE CI High"
		"ATE" "ATE SD" "ATE CI Low" "ATE CI High"
		"Share of ATE Mediated" "Share SD" "Share CI Low" "Share CI High"
		))	;

shell printf "\r\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* TABLE 7: Causal Mediation: Financial Education and Counseling Treatment
* ***************************************************************************** ;

est drop _all ;
drop rbdg_tried-rend_attitudes ;

local t flcouns ;
local outcomes bdg_tried sav_bnk loan_bus ins_life ; 

* for outcomes except loan_bus ;
* remove strata FEs ;
foreach var of varlist `t' bdg_tried sav_bnk ins_life  `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of varlist bdg_tried sav_bnk ins_life  { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
	} ;
} ;

* for observations with non-missing loan_bus 
* (need to do this separately since the number of observations is fewer for this outcome) ;
drop rfl-rend_attitudes ;
foreach var of varlist `t' loan_bus `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if (`t' | control) & !missing(loan_bus) ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

foreach y of varlist loan_bus  { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
	} ;
} ;

* output ;
shell printf "********** Table 7 **********\n" >> "./output/table-01-08.csv" ;

esttab rbdg_tried* rsav_bnk* rloan_bus* rins_life* using "./output/table-01-08.csv",
		append msign(-) depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `rcvars')
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N
		acme acme_sd acme_ci_lo acme_ci_hi
		ade ade_sd ade_ci_lo ade_ci_hi
		ate ate_sd ate_ci_lo ate_ci_hi
		pct pct_sd pct_ci_lo pct_ci_hi, 
		fmt(%9.3f %20s %20s %9.3f %9.0f
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		) 
		labels("Adj. R-squared" "Observations" 
		"ACME" "ACME SD" "ACME CI Low" "ACME CI High"
		"ADE" "ADE SD" "ADE CI Low" "ADE CI High"
		"ATE" "ATE SD" "ATE CI Low" "ATE CI High"
		"Share of ATE Mediated" "Share SD" "Share CI Low" "Share CI High"
		))	;

shell printf "\r\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* TABLE 8: Causal Mediation: All Three Treatments
* ***************************************************************************** ;

est drop _all ;
drop rflcouns-rend_attitudes ;

local t flcounsgoal ;
local outcomes bdg_tried sav_bnk ins_life ; 

* remove strata FEs ;
foreach var of varlist `t' `outcomes' `cvars' end_numeracy end_awareness end_attitudes { ;
	reg `var' i.strata if `t' | control ;
	predict r`var', resid ;
	label var r`var' `var' ;
} ;

* loop over all outcomes and mediators ;
foreach y of local outcomes { ;
	foreach m of varlist end_numeracy end_awareness end_attitudes { ;
		di "*********** treatment: `t', outcome: `y', mediator: `m' ******************* " ;
		medeff_FC	(regress r`m' r`t' `rcvars') 
					(regress r`y' r`t' r`m' `rcvars')
					if `t' | control,
					treat(r`t') mediate(r`m') seed(24533) vce(cluster wave_class) sims(1000) ;
	} ;
} ;

* output ;
shell printf "********** Table 8 **********\n" >> "./output/table-01-08.csv" ;

esttab rbdg_tried* rsav_bnk* rins_life* using "./output/table-01-08.csv",
		append msign(-) depvars label
		cells(b(star fmt(%9.3f %9.3f)) se(par)) drop(_cons `rcvars')
		star(* 0.10 ** 0.05 *** 0.01) 
		stats(r2_a N
		acme acme_sd acme_ci_lo acme_ci_hi
		ade ade_sd ade_ci_lo ade_ci_hi
		ate ate_sd ate_ci_lo ate_ci_hi
		pct pct_sd pct_ci_lo pct_ci_hi, 
		fmt(%9.3f %20s %20s %9.3f %9.0f
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		%9.3f %9.3f %9.3f %9.3f 
		) 
		labels("Adj. R-squared" "Observations" 
		"ACME" "ACME SD" "ACME CI Low" "ACME CI High"
		"ADE" "ADE SD" "ADE CI Low" "ADE CI High"
		"ATE" "ATE SD" "ATE CI Low" "ATE CI High"
		"Share of ATE Mediated" "Share SD" "Share CI Low" "Share CI High"
		))	;

shell printf "\r\n" >> "./output/table-01-08.csv" ;

* ***************************************************************************** ;
* get control group means and add them to the bottom of the CSV output file
* ***************************************************************************** ;

shell printf "********** Control Group Means **********\n" >> "./output/table-01-08.csv" ;
shell printf "Variable, Control Group Mean\r\n" >> "./output/table-01-08.csv" ;

foreach var of varlist bdg_tried sav_bnk loan_bus ins_life end_numeracy end_awareness end_attitudes { ;
	di "`var'" ;
	summ `var' if control == 1 ;
	local stat = string(round(`r(mean)', 0.001)) ;
	
	shell printf "`var', `stat'\r\n" >>  "./output/table-01-08.csv" ;
} ;

exit ;
